{
 "cells": [
  {
   "metadata": {
    "ExecuteTime": {
     "end_time": "2024-11-17T09:47:18.824938Z",
     "start_time": "2024-11-17T09:47:18.820007Z"
    }
   },
   "cell_type": "code",
   "source": [
    "%env LLM_BASE_URL=https://dashscope.aliyuncs.com/compatible-mode/v1\n",
    "%env LLM_API_KEY=替换为自己的Qwen API Key，如果不需要评估，则不需要"
   ],
   "id": "7902ebff7ecff6c9",
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "env: LLM_BASE_URL=https://dashscope.aliyuncs.com/compatible-mode/v1\n",
      "env: LLM_API_KEY=替换为自己的Qwen API Key，如果不需要评估，则不需要\n"
     ]
    }
   ],
   "execution_count": 1
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "id": "0dc2dd8a-ca1f-44b1-95cb-68d21a407fe0",
   "metadata": {
    "execution": {
     "iopub.execute_input": "2024-11-17T08:50:11.473481Z",
     "iopub.status.busy": "2024-11-17T08:50:11.471576Z",
     "iopub.status.idle": "2024-11-17T08:50:11.701643Z",
     "shell.execute_reply": "2024-11-17T08:50:11.701184Z",
     "shell.execute_reply.started": "2024-11-17T08:50:11.473405Z"
    }
   },
   "outputs": [],
   "source": [
    "import os\n",
    "import pandas as pd\n",
    "from openai import OpenAI"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "318a43db-8b14-455c-b668-3fe1c446214c",
   "metadata": {},
   "source": [
    "# 加载数据"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "id": "81a68698-5063-4302-9524-0c30759813e0",
   "metadata": {
    "execution": {
     "iopub.execute_input": "2024-11-17T08:50:30.260210Z",
     "iopub.status.busy": "2024-11-17T08:50:30.259948Z",
     "iopub.status.idle": "2024-11-17T08:50:30.263619Z",
     "shell.execute_reply": "2024-11-17T08:50:30.263022Z",
     "shell.execute_reply.started": "2024-11-17T08:50:30.260192Z"
    }
   },
   "outputs": [],
   "source": [
    "expr_version = 'product_v01_flowise_basic_rag'\n",
    "\n",
    "preprocess_output_dir = os.path.join(os.path.pardir, 'outputs', 'v1_20240713')\n",
    "expr_dir = os.path.join(os.path.pardir, 'experiments', expr_version)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "id": "c47b7af8-b390-420a-8a72-22e52c19861b",
   "metadata": {
    "execution": {
     "iopub.execute_input": "2024-11-17T08:50:33.634644Z",
     "iopub.status.busy": "2024-11-17T08:50:33.633873Z",
     "iopub.status.idle": "2024-11-17T08:50:33.747212Z",
     "shell.execute_reply": "2024-11-17T08:50:33.746742Z",
     "shell.execute_reply.started": "2024-11-17T08:50:33.634574Z"
    }
   },
   "outputs": [],
   "source": [
    "qa_df = pd.read_excel(os.path.join(preprocess_output_dir, 'question_answer.xlsx'))\n",
    "prediction_df = qa_df[qa_df['dataset'] == 'test'][['uuid', 'question', 'qa_type', 'answer']].rename(columns={'answer': 'ref_answer'})"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "eaf2c9aa-a597-47eb-9f6d-6e37ae48d55d",
   "metadata": {},
   "source": [
    "# 调用Flowise API"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "id": "9255751c-9441-497f-b502-27fd362b6471",
   "metadata": {
    "execution": {
     "iopub.execute_input": "2024-11-17T08:51:25.110422Z",
     "iopub.status.busy": "2024-11-17T08:51:25.109661Z",
     "iopub.status.idle": "2024-11-17T08:51:25.147850Z",
     "shell.execute_reply": "2024-11-17T08:51:25.147362Z",
     "shell.execute_reply.started": "2024-11-17T08:51:25.110352Z"
    }
   },
   "outputs": [],
   "source": [
    "import requests\n",
    "\n",
    "API_URL = \"http://192.168.31.92:3000/api/v1/prediction/8e7a0311-69be-4fee-979c-d57ee3726ceb\"\n",
    "def rag(question):\n",
    "    payload = {\n",
    "        \"question\": question\n",
    "    }\n",
    "    response = requests.post(API_URL, json=payload)\n",
    "    return response.json()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 19,
   "id": "810bb385-442f-4c0f-a905-5c693e47709c",
   "metadata": {
    "execution": {
     "iopub.execute_input": "2024-11-17T09:44:12.467527Z",
     "iopub.status.busy": "2024-11-17T09:44:12.467266Z",
     "iopub.status.idle": "2024-11-17T09:44:17.386368Z",
     "shell.execute_reply": "2024-11-17T09:44:17.385935Z",
     "shell.execute_reply.started": "2024-11-17T09:44:12.467505Z"
    }
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "{'text': '报告日期：2023年12月12日。',\n",
       " 'question': '这份报告的发布时间是什么时候',\n",
       " 'chatId': '21ab3135-0ecd-404f-af8c-95d2a37d99ed',\n",
       " 'chatMessageId': '01ac5f18-447a-4553-84b6-0fb2b1462ad6',\n",
       " 'isStreamValid': False,\n",
       " 'sessionId': '21ab3135-0ecd-404f-af8c-95d2a37d99ed'}"
      ]
     },
     "execution_count": 19,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "rag('这份报告的发布时间是什么时候')"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "e6c18999-b35d-4b71-93df-5bd71ba6d82a",
   "metadata": {},
   "source": [
    "# 批量预测"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "id": "e133fcfc-d0d1-4ffb-8818-436a5555ea51",
   "metadata": {
    "execution": {
     "iopub.execute_input": "2024-11-17T08:52:30.885370Z",
     "iopub.status.busy": "2024-11-17T08:52:30.885123Z",
     "iopub.status.idle": "2024-11-17T09:02:57.339218Z",
     "shell.execute_reply": "2024-11-17T09:02:57.338744Z",
     "shell.execute_reply.started": "2024-11-17T08:52:30.885349Z"
    }
   },
   "outputs": [
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "d91ee0c0db24432b9fb603e1031fe503",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "  0%|          | 0/100 [00:00<?, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "from tqdm.auto import tqdm\n",
    "\n",
    "answer_dict = {}\n",
    "for idx, row in tqdm(prediction_df.iterrows(), total=len(prediction_df)):\n",
    "    uuid = row['uuid']\n",
    "    question = row['question']\n",
    "    raw_resp = rag(question)\n",
    "\n",
    "    answer_dict[question] = {\n",
    "        'uuid': uuid,\n",
    "        'ref_answer': row['ref_answer'],\n",
    "        'gen_answer': raw_resp['text']\n",
    "    }"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "id": "7416c321-1b70-47ee-9968-d1fac2af53c4",
   "metadata": {
    "execution": {
     "iopub.execute_input": "2024-11-17T09:11:16.855901Z",
     "iopub.status.busy": "2024-11-17T09:11:16.855132Z",
     "iopub.status.idle": "2024-11-17T09:11:16.862112Z",
     "shell.execute_reply": "2024-11-17T09:11:16.861696Z",
     "shell.execute_reply.started": "2024-11-17T09:11:16.855830Z"
    }
   },
   "outputs": [],
   "source": [
    "prediction_df.loc[:, 'gen_answer'] = prediction_df['question'].apply(lambda q: answer_dict[q]['gen_answer'])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "id": "9e543968-c408-4604-9cbc-f80c17994ac4",
   "metadata": {
    "execution": {
     "iopub.execute_input": "2024-11-17T09:11:21.875031Z",
     "iopub.status.busy": "2024-11-17T09:11:21.874241Z",
     "iopub.status.idle": "2024-11-17T09:11:21.883818Z",
     "shell.execute_reply": "2024-11-17T09:11:21.883426Z",
     "shell.execute_reply.started": "2024-11-17T09:11:21.874958Z"
    }
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>uuid</th>\n",
       "      <th>question</th>\n",
       "      <th>qa_type</th>\n",
       "      <th>ref_answer</th>\n",
       "      <th>gen_answer</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>232</th>\n",
       "      <td>50f4fb19-9c21-4f0c-babb-2ddb1c33a6d6</td>\n",
       "      <td>公司债的发行规模变化如何？</td>\n",
       "      <td>detailed</td>\n",
       "      <td>上升了2.5%</td>\n",
       "      <td>2023年前10个月，美国公司债发行规模呈现上升态势，增幅为2.5%。</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7</th>\n",
       "      <td>e73a0c9d-d42b-4350-a4c3-b38bf67c68a5</td>\n",
       "      <td>如何联系报告的研究小组？</td>\n",
       "      <td>detailed</td>\n",
       "      <td>联系人王有鑫，电话010-66594127，或发送邮件至wangyouxin_hq@bank...</td>\n",
       "      <td>您可以拨打电话010-66594127或发送邮件至wangyouxin_hq@bank-of...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>312</th>\n",
       "      <td>c47057d6-35c0-4466-bafd-4ed036030c1d</td>\n",
       "      <td>美国单一家庭房贷整体拖欠率在2023年二季度达到多少？</td>\n",
       "      <td>detailed</td>\n",
       "      <td>1.7%</td>\n",
       "      <td>美国单一家庭房贷整体拖欠率在2023年二季度达到1.7%。</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>80</th>\n",
       "      <td>0943b71a-7314-4cef-a7d9-682c7ddc9d7f</td>\n",
       "      <td>目前情况下，哪种可能性更高，美国经济软着陆还是衰退？</td>\n",
       "      <td>detailed</td>\n",
       "      <td>“软着陆”发生的可能性要高于衰退</td>\n",
       "      <td>从目前情况看，“软着陆”发生的可能性要高于衰退。</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>60</th>\n",
       "      <td>774ab320-aba8-45a7-a5f4-791782b46d08</td>\n",
       "      <td>哪些因素影响了欧元区和英国的经济增长？</td>\n",
       "      <td>detailed</td>\n",
       "      <td>就业市场韧性消退、内外部需求回落、通胀和利率水平高企、地缘冲突余波未散</td>\n",
       "      <td>欧洲央行收紧货币政策、通胀高企抑制消费能力、疫情放开的促进效应逐步消减以及全球经济放缓导致出...</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                                     uuid                     question  \\\n",
       "232  50f4fb19-9c21-4f0c-babb-2ddb1c33a6d6                公司债的发行规模变化如何？   \n",
       "7    e73a0c9d-d42b-4350-a4c3-b38bf67c68a5                 如何联系报告的研究小组？   \n",
       "312  c47057d6-35c0-4466-bafd-4ed036030c1d  美国单一家庭房贷整体拖欠率在2023年二季度达到多少？   \n",
       "80   0943b71a-7314-4cef-a7d9-682c7ddc9d7f   目前情况下，哪种可能性更高，美国经济软着陆还是衰退？   \n",
       "60   774ab320-aba8-45a7-a5f4-791782b46d08          哪些因素影响了欧元区和英国的经济增长？   \n",
       "\n",
       "      qa_type                                         ref_answer  \\\n",
       "232  detailed                                            上升了2.5%   \n",
       "7    detailed  联系人王有鑫，电话010-66594127，或发送邮件至wangyouxin_hq@bank...   \n",
       "312  detailed                                               1.7%   \n",
       "80   detailed                                   “软着陆”发生的可能性要高于衰退   \n",
       "60   detailed                就业市场韧性消退、内外部需求回落、通胀和利率水平高企、地缘冲突余波未散   \n",
       "\n",
       "                                            gen_answer  \n",
       "232                2023年前10个月，美国公司债发行规模呈现上升态势，增幅为2.5%。  \n",
       "7    您可以拨打电话010-66594127或发送邮件至wangyouxin_hq@bank-of...  \n",
       "312                      美国单一家庭房贷整体拖欠率在2023年二季度达到1.7%。  \n",
       "80                            从目前情况看，“软着陆”发生的可能性要高于衰退。  \n",
       "60   欧洲央行收紧货币政策、通胀高企抑制消费能力、疫情放开的促进效应逐步消减以及全球经济放缓导致出...  "
      ]
     },
     "execution_count": 12,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "prediction_df.sample(5)"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "326687c0-0a32-4d51-a846-b164ed11c1c6",
   "metadata": {},
   "source": [
    "# 评估"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "id": "72104fef-3eb4-45cb-b449-c8dc71281016",
   "metadata": {
    "execution": {
     "iopub.execute_input": "2024-11-17T09:11:27.762824Z",
     "iopub.status.busy": "2024-11-17T09:11:27.762063Z",
     "iopub.status.idle": "2024-11-17T09:11:27.778007Z",
     "shell.execute_reply": "2024-11-17T09:11:27.777548Z",
     "shell.execute_reply.started": "2024-11-17T09:11:27.762755Z"
    }
   },
   "outputs": [],
   "source": [
    "from langchain_openai import ChatOpenAI\n",
    "\n",
    "judge_llm = ChatOpenAI(\n",
    "    api_key=os.environ['LLM_API_KEY'],\n",
    "    base_url=os.environ['LLM_BASE_URL'],\n",
    "    model_name='qwen2-72b-instruct',\n",
    "    temperature=0\n",
    ")\n",
    "\n",
    "import time\n",
    "\n",
    "def evaluate(prediction_df):\n",
    "    \"\"\"\n",
    "    对预测结果进行打分\n",
    "    :param prediction_df: 预测结果，需要包含问题，参考答案，生成的答案，列名分别为question, ref_answer, gen_answer\n",
    "    :return 打分模型原始返回结果\n",
    "    \"\"\"\n",
    "    prompt_tmpl = \"\"\"\n",
    "你是一个经济学博士，现在我有一系列问题，有一个助手已经对这些问题进行了回答，你需要参照参考答案，评价这个助手的回答是否正确，仅回复“是”或“否”即可，不要带其他描述性内容或无关信息。\n",
    "问题：\n",
    "<question>\n",
    "{{question}}\n",
    "</question>\n",
    "\n",
    "参考答案：\n",
    "<ref_answer>\n",
    "{{ref_answer}}\n",
    "</ref_answer>\n",
    "\n",
    "助手回答：\n",
    "<gen_answer>\n",
    "{{gen_answer}}\n",
    "</gen_answer>\n",
    "请评价：\n",
    "    \"\"\"\n",
    "    results = []\n",
    "\n",
    "    for _, row in tqdm(prediction_df.iterrows(), total=len(prediction_df)):\n",
    "        question = row['question']\n",
    "        ref_answer = row['ref_answer']\n",
    "        gen_answer = row['gen_answer']\n",
    "\n",
    "        prompt = prompt_tmpl.replace('{{question}}', question).replace('{{ref_answer}}', str(ref_answer)).replace('{{gen_answer}}', gen_answer).strip()\n",
    "        result = judge_llm.invoke(prompt).content\n",
    "        results.append(result)\n",
    "\n",
    "        time.sleep(1)\n",
    "    return results"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "id": "6d684e34-8c42-455e-9d2d-fe05a044fbba",
   "metadata": {
    "execution": {
     "iopub.execute_input": "2024-11-17T09:11:27.982089Z",
     "iopub.status.busy": "2024-11-17T09:11:27.980682Z",
     "iopub.status.idle": "2024-11-17T09:13:52.009750Z",
     "shell.execute_reply": "2024-11-17T09:13:52.009175Z",
     "shell.execute_reply.started": "2024-11-17T09:11:27.982009Z"
    }
   },
   "outputs": [
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "9363d119f7ee4ef0a3a4536b5f5bd156",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "  0%|          | 0/100 [00:00<?, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "prediction_df['raw_score'] = evaluate(prediction_df)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "id": "e4adb904-42e9-4735-b6a0-90c34207bbc8",
   "metadata": {
    "execution": {
     "iopub.execute_input": "2024-11-17T09:13:52.010848Z",
     "iopub.status.busy": "2024-11-17T09:13:52.010665Z",
     "iopub.status.idle": "2024-11-17T09:13:52.014019Z",
     "shell.execute_reply": "2024-11-17T09:13:52.013687Z",
     "shell.execute_reply.started": "2024-11-17T09:13:52.010834Z"
    }
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array(['是', '否'], dtype=object)"
      ]
     },
     "execution_count": 15,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "prediction_df['raw_score'].unique()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "id": "ef330e4a-cd66-409d-9233-0b0aeb1a02b4",
   "metadata": {
    "execution": {
     "iopub.execute_input": "2024-11-17T09:13:52.014698Z",
     "iopub.status.busy": "2024-11-17T09:13:52.014468Z",
     "iopub.status.idle": "2024-11-17T09:13:52.025719Z",
     "shell.execute_reply": "2024-11-17T09:13:52.025273Z",
     "shell.execute_reply.started": "2024-11-17T09:13:52.014684Z"
    }
   },
   "outputs": [],
   "source": [
    "prediction_df['score'] = (prediction_df['raw_score'] == '是').astype(int)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "id": "3b1ea28f-bfac-41d0-a2bb-706bfb403b05",
   "metadata": {
    "execution": {
     "iopub.execute_input": "2024-11-17T09:13:52.026675Z",
     "iopub.status.busy": "2024-11-17T09:13:52.026491Z",
     "iopub.status.idle": "2024-11-17T09:13:52.032008Z",
     "shell.execute_reply": "2024-11-17T09:13:52.031664Z",
     "shell.execute_reply.started": "2024-11-17T09:13:52.026660Z"
    }
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0.82"
      ]
     },
     "execution_count": 17,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "prediction_df['score'].mean()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "e1eb91d3-cc18-48a3-be05-3c5cbb6beffb",
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.10.9"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}
